# -*- coding:utf8 -*-
# !/usr/bin/env python

'''
#全国企业信用信息公示系统（辽宁）
#维护黄羽
'''

import re
import urllib2
from bs4 import BeautifulSoup
from utils import kill_captcha
# from scpy.request_util import *
from request_util import *
from parse_util.parse_basesic import parse_basesic
from scpy.logger import get_logger
from table import *
import json
import traceback

logger = get_logger(__file__)


def download_captcha_and_kill(companyName, province):
    '''
    下载验证码，破解，然后搜索公司
    :param companyName:
    :param province:
    :return:若验证码破解成功且公司存在公司，返回公司网页。
            若公司不存在返回None
            若破解的验证码错误，返回''
            若破解过程、访问网页出现失败，抛出异样
    '''
    # get_captcha_request_util = RequestUtil()#不启用代理ip
    if province == "ln" and companyName:
        pass
    else:
        logger.error('输入的省份错误或公司部存在,你当前输入为,省份：%s,公司或关键字：%s' % (province, companyName))
        return None

    img_url_str = r'http://gsxt.lngs.gov.cn/saicpub/commonsSC/loginDC/securityCode.action?tdate=29157'
    # img_url_str = r'http://gsxt.lngs.gov.cn/saicpub/commonsSC/loginDC/securityCode.action?tdate=93316'

    get_captcha_headers = {
        'Accept': 'image/webp,image/*,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, sdch',
        'Accept-Language': 'zh-CN,zh;q=0.8',
        'Connection': 'keep-alive',
        'Host': 'gsxt.lngs.gov.cn',
        'Referer': 'http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/entPublicity/search/searchmain.jsp',
    }
    # get_captcha_request_util.set_hreaders(get_captcha_headers)
    get_captcha_request_util = RequestUtil(get_captcha_headers)

    try:
        img_bin = get_captcha_request_util.make_request(img_url_str).content
    except Exception, e:
        logger.error(e)
        raise e

    if not img_bin:  # 判断下载的验证码是否正确
        return ''

    try:
        res_code = kill_captcha(img_bin, province, "jpeg")
    except Exception, e:
        logger.exception("破解验证码的服务，出现异常")
        raise e

    if not res_code or len(res_code) > 100:
        logger.exception("验证码内容为: \n %s", res_code)
        logger.exception("破解验证码的服务，出现异常,可能是下载的验证码错误，也可能破解服务出现异常")
        return ''  # 返回空字符串，用于重复破解

    logger.info('验证码为:%s' % res_code)

    # 获取公司基本信息的入口URL
    search_company_list_headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Languagehttp': 'zh-CN,zh;q=0.8',
        'Cache-Control': 'max-age=0',
        'Connection': 'keep-alive',
        'Content-Type': 'pplication/x-www-form-urlencoded',
        'Origin': 'http://gsxt.lngs.gov.cn',
        'Host': 'gsxt.lngs.gov.cn',
        'Referer': 'http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/entPublicity/search/searchmain.jsp',
        'Upgrade-Insecure-Requests': '1',
    }
    # search_company_list_request_util = get_captcha_request_util
    # search_company_list_request_util.set_hreaders(search_company_list_headers)
    search_company_list_request_util = RequestUtil(search_company_list_headers)

    search_company_list_dict = {
        'solrCondition': companyName,
        'authCode': res_code,
    }
    search_company_list_url_str = 'http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/lngsSearchFpc.action'
    try:
        search_company_list_res = search_company_list_request_util.make_request(
            search_company_list_url_str,
            method='post',
            data=search_company_list_dict).content
    except Exception, e:
        logger.exception(e)
        raise e

    search_company_list_soup = BeautifulSoup(search_company_list_res, 'html5lib')

    # 判断破解的验证码是否正确，如果验证码错误，1、工商网站仍然在首,2、出错页面。
    if re.compile('严重违法企业名单').findall(search_company_list_res) or re.compile('出错了').findall(search_company_list_res):
        return ''  # 返回空字符串，用于重复破解

    company_list_json = re.compile('searchList_paging\((.*),.*\);').findall(search_company_list_res)

    if not company_list_json:
        return ''
    else:
        company_list = json.loads(company_list_json[0])

    if company_list:
        logger.info("搜索到:%s条信息" % len(company_list))
        company_pripid = company_list[0]
        return company_pripid
    else:
        return None  # 搜索的公司不存在


def get_company_info(company_pripid):
    '''
    下载网页、年报、解析网页
    :param searchCompany:首页的网页
    :return:公司字典
    '''

    if company_pripid:
        # company_base_info_url = 'http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/getJbxxAction.action?pripid=a8f306e3-0146-1000-e001-05ee0affffab&type=1130'

        company_base_info_dict = {
            'pripid': company_pripid['pripid'],
            'type': company_pripid['enttype'],
        }

        company_info_dict = {}

        company_year_report_list = []

        company_info_dict['punishBreakList'] = []
        company_info_dict['punishedList'] = []
        company_info_dict['alidebtList'] = []

        company_info_dict['entinvItemList'] = []

        company_info_dict['frinvList'] = []
        company_info_dict['frPositionList'] = []
        company_info_dict['filiationList'] = []
        company_info_dict['caseInfoList'] = []
        company_info_dict['sharesFrostList'] = []
        company_info_dict['sharesImpawnList'] = []
        company_info_dict['morDetailList'] = []
        company_info_dict['morguaInfoList'] = []
        company_info_dict['liquidationList'] = []

        rootUrl_str = 'http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/'
        jibenxinxi_url = 'getJbxxAction.action?'

        # 获取公司基本信息的地址
        company_base_info_headers = {
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Encoding': 'gzip, deflate, sdch',
            'Accept-Language': 'zh-CN,zh;q=0.8',
            'Cache-Control': 'max-age=0',
            'Connection': 'keep-alive',
            'Host': 'gsxt.lngs.gov.cn',
            'Referer': 'http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/sEntDetail.action',
            'X-Requested-With': 'XMLHttpRequest',
        }

        # get_company_info_request_util = RequestUtil()#不启用代理ip
        # get_company_info_request_util.set_hreaders(company_base_info_headers)

        get_company_info_request_util = RequestUtil(company_base_info_headers)
        try:
            get_company_info_res = get_company_info_request_util.make_request(
                rootUrl_str + jibenxinxi_url,
                method='get',
                data=company_base_info_dict).content
        except Exception, e:
            logger.error(e)
            raise e

        get_company_info_soup = BeautifulSoup(get_company_info_res, 'html5lib')
        jibenxinxi = get_company_info_soup.find_all(id='jibenxinxi')[0]

        # 获取登记信息中的基本信息
        jibenxinxi_table_list = jibenxinxi.find_all('table')
        basic_list = jibenxinxi_table_list[0]

        basic_list = parse_basesic(basic_list)
        if basic_list and isinstance(basic_list, list):
            company_info_dict['basicList'] = basic_list
        else:
            company_info_dict['basicList'] = []


        # 股东信息
        gudongxinxi_url = 'getTzrxxAction.action?'
        get_company_info_res = get_company_info_request_util.make_request(
            rootUrl_str + gudongxinxi_url,
            method='get',
            data=company_base_info_dict).content
        # get_company_info_res_soup = BeautifulSoup(get_company_info_res, 'html5lib')

        gudongxinxi = re.compile('tzr_paging\((.*),.*,.*,.*\);').findall(get_company_info_res)

        shareHolderList = []
        if gudongxinxi:
            for item in json.loads(gudongxinxi[0]):
                share_holder_dict = {}
                # print json.dumps(json.loads(gudongxinxi[0]), indent=4, ensure_ascii=False)
                share_holder_dict['shareholderName'] = item['inv']
                share_holder_dict['shareholderType'] = item['invtypeName']
                share_holder_dict['shareHolderdetail'] = []
                shareHolderList.append(share_holder_dict)
        company_info_dict['shareHolderList'] = shareHolderList

        # 变更
        biangen_url = 'getBgxxAction.action?'
        get_company_info_res = get_company_info_request_util.make_request(
            rootUrl_str + biangen_url,
            method='get',
            data=company_base_info_dict).content

        paging = re.compile('paging\((.*),.*\);').findall(get_company_info_res)
        paging_list = json.loads(paging[0])
        alterList = []
        if paging_list:
            for alter in paging_list:
                alter_dict = {}
                alter_dict['altDate'] = parse_time(alter['altdate'])
                alter_dict['altItem'] = alter['altitemName']
                alter_dict['altBe'] = alter['altbe']
                alter_dict['altAf'] = alter['altaf']
                alterList.append(alter_dict)
        company_info_dict['alterList'] = alterList

        # 第二页 备案信息
        # 主要人员信息
        zhuyaorenyuan_url = 'getZyryxxAction.action?'
        get_company_info_res = get_company_info_request_util.make_request(
            rootUrl_str + zhuyaorenyuan_url,
            method='get',
            data=company_base_info_dict).content

        zyry_nz_paging = re.compile('zyry_nz_paging\((.*),.*\);').findall(get_company_info_res)
        zyry_nz_paging_list = json.loads(zyry_nz_paging[0])
        # print json.dumps(json.loads(zyry_nz_paging[0]), indent=4, ensure_ascii=False)
        personList = []
        if zyry_nz_paging_list:
            for person in zyry_nz_paging_list:
                person_dict = {}
                person_dict['position'] = person['positionName']
                person_dict['name'] = person['name']
                person_dict['sex'] = ''
                personList.append(person_dict)

        company_info_dict['personList'] = personList

        # 分支机构信息 待完善
        fenzhijigou_url = 'getFgsxxAction.action?'
        get_company_info_res = get_company_info_request_util.make_request(
            rootUrl_str + fenzhijigou_url,
            method='get',
            data=company_base_info_dict).content

        fzjgPaging = re.compile('fzjgPaging\((.*),.*\);').findall(get_company_info_res)
        fzjgPaging_list = json.loads(fzjgPaging[0])
        # print json.dumps(json.loads(fzjgPaging[0]), indent=4, ensure_ascii=False)
        filiationList = []
        if fzjgPaging_list:
            filiation_dict = {}
            for filiation in fzjgPaging_list:
                pass
        company_info_dict['filiationList'] = filiationList

        # 清算 空
        liquidationList = []
        company_info_dict['liquidationList'] = liquidationList

        # 动产抵押登记信息 空
        morguaInfoList = []
        company_info_dict['morguaInfoList'] = morguaInfoList

        # 股权出质登记信息 空

        # 经营异常信息
        jingyingyichang_url = 'getJyycxxAction.action?'
        get_company_info_res = get_company_info_request_util.make_request(
            rootUrl_str + jingyingyichang_url,
            method='get',
            data=company_base_info_dict).content

        jyyc_paging = re.compile('jyyc_paging\((.*),.*\);').findall(get_company_info_res)
        jyyc_paging_list = json.loads(jyyc_paging[0])
        # print json.dumps(json.loads(jyyc_paging[0]), indent=4, ensure_ascii=False)
        abnormalOperation = []
        if jyyc_paging_list:
            abnormalOperation_dict = {}
            for item in jyyc_paging_list:
                abnormalOperation_dict['specause'] = item['specauseName']
                abnormalOperation_dict['abntime'] = parse_time(item['abnDate'])
                abnormalOperation_dict['retime'] = parse_time(item['remDate'])
                abnormalOperation_dict['recause'] = item['remexcpresName']
                abnormalOperation_dict['decorg'] = item['lrregorgName']
        company_info_dict['abnormalOperation'] = abnormalOperation

        # 抽查检查信息
        chouchajiancha_url = 'getCcjcxxAction.action?'
        get_company_info_res = get_company_info_request_util.make_request(
            rootUrl_str + chouchajiancha_url,
            method='get',
            data=company_base_info_dict).content

        ccjc_paging = re.compile('ccjc_paging\((.*),.*\);').findall(get_company_info_res)
        ccjc_paging_list = json.loads(ccjc_paging[0])
        # print json.dumps(json.loads(ccjc_paging[0]), indent=4, ensure_ascii=False)
        checkMessage = []
        if ccjc_paging_list:
            checkMessage_dict = {}
            for item in ccjc_paging_list:
                checkMessage_dict['institution'] = item['insauthName']
                checkMessage_dict['check_date'] = parse_time(item['insdateStr'])
                checkMessage_dict['check_type'] = item['instypeName']
                checkMessage_dict['check_result'] = item['remark']
                checkMessage.append(checkMessage_dict)
        company_info_dict['checkMessage'] = checkMessage

        '''
        年报
        '''
        qynb_url = 'getQygsQynbxxAction.action?'
        get_company_info_res = get_company_info_request_util.make_request(
            rootUrl_str + qynb_url,
            method='get',
            data=company_base_info_dict).content

        qynbPaging = re.compile('qynbPaging\((.*),.*,.*\);').findall(get_company_info_res)
        qynbPaging_list = json.loads(qynbPaging[0])
        # print json.dumps(json.loads(qynbPaging[0]), indent=4, ensure_ascii=False)
        if qynbPaging_list:
            for a_qynbPaging in qynbPaging_list:
                company_year_report_dict = {}
                company_year_report_req_data_dict = {
                    'artId': a_qynbPaging['artid'],
                    'entType': company_pripid['enttype'],  # 公司类型，前面的
                }

                nbDeatil_url = 'nbDeatil.action?'
                get_year_report_res = get_company_info_request_util.make_request(
                    rootUrl_str + nbDeatil_url,
                    method='get',
                    data=company_year_report_req_data_dict).content

                get_year_report_soup = BeautifulSoup(get_year_report_res, 'html5lib')
                qufenkuang_tag = get_year_report_soup.find_all(id='qufenkuang')
                # table_list = qufenkuang_tag[0].find_all('table')

                # getCompanyAYearReport_soup = BeautifulSoup(getCompanyAYearReport, 'html5lib')
                '''
                # 年报里面的股东信息、股权变更信息、知识产权出资登记信息等都没有信息，点击按钮也不发送请求
                '''

                # qufenkuang_list = getCompanyAYearReport_soup.find_all(id='qufenkuang')

                if qufenkuang_tag:
                    table_list = qufenkuang_tag[0].find_all('table')

                    # 年报的 企业基本信息
                    company_year_report_dict['baseInfo'] = report_index('企业基本信息', str(table_list[0]))
                    # 年报的 网站或网店信息
                    company_year_report_dict['website'] = report_index('网站或网店信息', str(table_list[1]))
                    # 年报的 股东及出资信息
                    # companyYearReport_dict['investorInformations'] = report_index('股东及出资信息',str(table_list[2]))
                    czPaging = re.compile('czPaging\((.*)\);').findall(get_year_report_res)
                    czPaging_list = json.loads(czPaging[0])
                    # print json.dumps(json.loads(czPaging[0]), indent=4, ensure_ascii=False)
                    investorInformations = []
                    for item in czPaging_list:
                        investorInformations_dict = {}
                        investorInformations_dict['subConam'] = item['lisubconam']
                        investorInformations_dict['shareholderName'] = item['inv']
                        investorInformations_dict['subConType'] = item['subconformvalue']
                        investorInformations_dict['subConDate'] = parse_time(item['subcondatelabel'])
                        investorInformations_dict['paidTime'] = parse_time(item['accondatelabel'])
                        investorInformations_dict['paidType'] = item['acconformvalue']
                        investorInformations_dict['paidConMoney'] = item['liacconam']
                        investorInformations.append(investorInformations_dict)
                    company_year_report_dict['investorInformations'] = investorInformations

                    # 年报的 对外投资信息
                    # companyYearReport_dict[''] = report_index('对外投资信息',str(table_list[2]))

                    # 年报的 企业资产状况信息
                    company_year_report_dict['assetsInfo'] = report_index('企业资产状况信息', str(table_list[4]))

                    # 年报的 股权变更信息
                    company_year_report_dict['equityChangeInformations'] = report_index('股权变更信息', str(table_list[6]))

                    table_list_2 = qufenkuang_tag[1].find_all('table')

                    # 年报的 股权变更信息
                    company_year_report_dict['changeRecords'] = report_index('修改记录', str(table_list_2[0]))

                    company_year_report_dict['year'] = a_qynbPaging['ancheyear']

                company_year_report_list.append(company_year_report_dict)
            company_info_dict['yearReportList'] = company_year_report_list
        else:
            company_info_dict['yearReportList'] = []
        return company_info_dict
    else:
        return None


def search(companyName):
    '''
    辽宁：ln
    :param companyName:公司名字或注册号
    :return:若公司存在返回公司信息
            若不存在返回None
    '''
    province = 'ln'
    MAXTIME = 20
    atime = MAXTIME
    company_pripid = ''

    while atime > 0 and (company_pripid == ''):
        try:
            company_pripid = download_captcha_and_kill(companyName, province)
        except Exception, e:
            traceback.print_exc(e)
            logger.info(e)
            raise e

        # 破解验证码的错误需要重复破解验证码
        if company_pripid == '':
            atime -= 1
            logger.info("验证码破解失败，重复破解验证码,当前设定次数为:%s ,剩余次数为:%s" % (MAXTIME, atime))
            continue

        # 公司不存在或输入关键字错误
        elif company_pripid == None:
            break
        else:
            logger.info("验证码破解成功！")

    # 解析网页
    try:
        if company_pripid:
            companyInfo_dict = get_company_info(company_pripid)
            return companyInfo_dict
        else:
            return None
    except Exception, e:
        traceback.print_exc(e)
        logger.info(e)
        raise e


if __name__ == "__main__":
    # companyName = u'中网联合辽宁科技有限公司'
    # companyName = u'辽宁聚龙股份有限公司'
    # companyName = u'辽宁衡业集团有限公司'
    companyName = u'辽宁远大集团有限公司 '
    # companyName = u'营口明大投资有限公司 '
    # province = 'gx'
    result = search(companyName)
    import json

    print json.dumps(result, indent=4, ensure_ascii=False)
